library(dplyr)
library(tidyr)
library(pheatmap)
library(openxlsx)
##Pull data for analysis
Heart_raw<-read.xlsx("../1_Input/2_Protein/Heart - PN-0064 Heart - mep.xlsx", colNames = T, rowNames = F, sheet = "Cleaned_NoMVs")
Heart_raw.cleaned<-Heart_raw %>% mutate(Gene.Symbol = strsplit(as.character(Gene.Symbol), split = ";")) %>% unnest(Gene.Symbol)
Heart_raw$Gene.Symbol<-make.unique(Heart_raw$Gene.Symbol, sep = ".")
Heart_raw.complete.cleaned<-Heart_raw.cleaned[!is.na(Heart_raw.cleaned$Gene.Symbol),]
LFQs.Heart_raw<-dplyr::select(Heart_raw.complete.cleaned, contains("LFQ"))
rownames(LFQs.Heart_raw)<-make.unique(Heart_raw.complete.cleaned$Gene.Symbol, sep = ".")
Once we established that the populations under consideration truly display divergene expression patterns, we sought to determine whether unbiased global gene expression patterns recapitulate the described phenotypes within each heart failure group. To accomplish this, an unsupervised Principal Components Analysis (PCA) was initially used with normalized counts.
Before running the principal components analysis, it was necessary to first determine the number of PC’s required to account for 80% of the variance, a machine-learning algorithmm benchmark that provides sufficient confidence in the analysis.
#Plot Features of the PCA
library(dplyr)
library(plotly)
#transpose the dataset (required for PCA)
data.pca<-t(LFQs.Heart_raw)
data.pca<-as.data.frame(data.pca)
##Import the data to be used for annotation
Index<-c(0,0,0,0,1,1,1,1)
Index<-as.data.frame(Index)
##merge the file
data.pca_Final<-cbind(Index, data.pca)
rownames(data.pca_Final)<-data.pca_Final$Row.names
pca.comp<-prcomp(data.pca_Final[,(ncol(Index)+2):ncol(data.pca_Final)])
pcaCharts=function(x) {
x.var <- x$sdev ^ 2
x.pvar <- x.var/sum(x.var)
par(mfrow=c(2,2))
plot(x.pvar,xlab="Principal component",
ylab="Proportion of variance", ylim=c(0,1), type='b')
plot(cumsum(x.pvar),xlab="Principal component",
ylab="Cumulative Proportion of variance",
ylim=c(0,1),
type='b')
screeplot(x)
screeplot(x,type="l")
par(mfrow=c(1,1))
}
pcaCharts(pca.comp)
png(file=paste0("../2_Output/2_Protein/Proteomics_PCA.Charts.png"))
pcaCharts(pca.comp)
dev.off()
## quartz_off_screen
## 2
From the previous calculations, it is appears that 3 principal components are necessary (accounting for >80% cumulative variance).
##Create a 3D-PCA for Inspection
library(plotly)
##Index
PCs<-cbind(pca.comp$x, Index)
rownames(PCs)<-rownames(data.pca)
ax_text<-list(
family = "times",
size = 12,
color = "black")
t <- list(
family = "times",
size = 14,
color = "black")
p <- plot_ly(PCs, x = ~PC1, y = ~PC2, z = ~PC3,
marker = list(color = ~Index,
colorscale = c('#FFE1A1', '#683531'),
showscale = TRUE),
text=rownames(PCs)) %>%
add_markers() %>%
layout(scene = list(
xaxis = list(title = 'PC1', zerolinewidth = 4,
zerolinecolor="darkgrey", linecolor="darkgrey",
linewidth=4, titlefont=t, tickfont=ax_text),
yaxis = list(title = 'PC2', zerolinewidth = 4,
zerolinecolor="darkgrey", linecolor="darkgrey",
linewidth=4, titlefont=t, tickfont=ax_text),
zaxis = list(title = 'PC3', zerolinewidth = 4,
zerolinecolor="darkgrey", linecolor="darkgrey",
linewidth=4, titlefont=t, tickfont=ax_text)),
annotations = list(
x = 1.13,
y = 1.03,
text = 'Diabetes',
xref = '1',
yref = '0',
showarrow = FALSE))
p #must comment out for PDF generation via knitr (Pandoc)
library(pheatmap)
library(dplyr)
##Import Data Matrix
Results_HM<-dplyr::filter(Heart_raw.complete.cleaned, minuslog_pval>2)
HM_data.p05<-data.matrix(dplyr::select(Results_HM, contains("LFQ")))
rownames(HM_data.p05)<-Results_HM$Gene.Symbol
#Import the Index File
paletteLength <- 100
myColor <- colorRampPalette(c("dodgerblue4", "white", "brown4"))(paletteLength)
pheatmap(HM_data.p05, color = myColor, scale = "row")
Heatmap and Unsupervised Hierarchical Clustering of Proteomics in Adipor1 knockout relative to wild-type.
pheatmap(HM_data.p05, color = myColor, scale = "row", filename = "../2_Output/2_Protein/Heatmap.Heart_LFQs.p01.pdf")
# Load packages
library(dplyr)
library(ggplot2)
library(ggrepel)
library(openxlsx)
# Read data from the web
results<-read.xlsx("../1_Input/2_Protein/Heart - PN-0064 Heart - mep.xlsx", sheet = "IPA_Import")
results = mutate(results, sig=ifelse(results$minuslogpvalue>1.3 & abs(results$log2FC)>0.585, "p < 0.05 and |FC| > 1.5", "Not Sig"))
#plot the ggplot
p = ggplot(results, aes(log2FC, minuslogpvalue)) + theme(panel.background = element_rect("white", colour = "black", size=2), panel.grid.major = element_line(colour = "gray50", size=.75), panel.grid.minor = element_line(colour = "gray50", size=0.4)) +
geom_point(aes(fill=sig), colour="black", shape=21) + labs(x=expression(Log[2](Fold-Change)), y=expression(-Log[10](P-value))) + xlim(-5,5)+ geom_hline(yintercept = 0, size = 1) + geom_vline(xintercept=0, size=1)+
scale_fill_manual(values=c("black", "tomato"))
#add a repelling effect to the text labels.
p+geom_text_repel(data=filter(results, minuslogpvalue>2 & abs(log2FC)>2), aes(label=Gene))
pdf(file = "../2_Output/2_Protein/Volcano.Plot_Heart.pdf")
p+geom_text_repel(data=filter(results, minuslogpvalue>2 & abs(log2FC)>1), aes(label=Gene))
dev.off()
## quartz_off_screen
## 2
# Load the packages used for the network analysis
library("igraph")
library("network")
library("sna")
library("ndtv")
library("openxlsx")
library(tidyr)
library(dplyr)
# Load the data used for the network analysis
links <- read.xlsx("../1_Input/Pathways/Adiponectin_Network_AA.ONLY_ICM.v.NICM.xlsx")
links<- links %>% mutate(From = strsplit(as.character(From), split = ";")) %>% unnest(From)
links<- links %>% mutate(To = strsplit(as.character(To), split = ";")) %>% unnest(To)
links<-unique(links[,2:3])
nodes <- read.xlsx("../1_Input/1_RNA/LRT-Ischemia-Race Interaction_DESeq2.xlsx", sheet = "Unfiltered")
nodes<-dplyr::mutate(nodes, Logpvalue=-log(pvalue, 10))
nodes<-dplyr::select(nodes, -pvalue)
nodes<-nodes %>% mutate(Gene = strsplit(as.character(external_gene_name), split = ";")) %>% unnest(Gene)
nodes$Gene<-make.unique(nodes$Gene, sep = ".")
nodes<-nodes[!is.na(nodes$Gene),]
nodes<-dplyr::select(nodes, id=Gene, log2FoldChange, Logpvalue)
nodes_To<-merge(nodes, links, by.x = "id", by.y = "To")
nodes_To<-dplyr::select(nodes_To, -"From")
nodes_From<-merge(nodes, links, by.x = "id", by.y = "From")
nodes_From<-dplyr::select(nodes_From, -To)
nodes_both<-rbind(nodes_To, nodes_From)
nodes_filtered<-distinct(nodes_both)
#create colors for nodes based on fold-change
nodes_filtered = mutate(nodes_filtered, colrs=ifelse(nodes_filtered$log2FoldChange>0, "goldenrod1", "dodgerblue"))
# Examine the data
head(nodes)
## id log2FoldChange Logpvalue
## 1 TSPAN6 -0.174198936 0.89967284
## 2 TNMD 1.367731837 4.09888460
## 3 DPM1 -0.072854878 0.03328028
## 4 SCYL3 0.148501007 0.48531617
## 5 C1orf112 0.008380419 0.35216380
## 6 FGR 1.141066575 1.43322194
head(links)
## From To
## 1 ACACB ACACB
## 2 ACACB LEP
## 3 ACACB MLXIPL
## 4 ACACB PPARG
## 5 ACACB SCD
## 6 ADIPOQ CASP3
# since there are a few repeated links, we can consolidate them
links<-links[order(links$From, links$To),]
rownames(links)<-NULL
library("igraph")
net<-graph.data.frame(links, nodes_filtered, directed=T)
net
## IGRAPH 18eb837 DN-- 24 134 --
## + attr: name (v/c), log2FoldChange (v/n), Logpvalue (v/n), colrs
## | (v/c)
## + edges from 18eb837 (vertex names):
## [1] ACACB ->ACACB ACACB ->LEP ACACB ->MLXIPL ACACB ->PPARG
## [5] ACACB ->SCD ADIPOQ->ACACB ADIPOQ->ADIPOQ ADIPOQ->CASP3
## [9] ADIPOQ->CEBPA ADIPOQ->CTGF ADIPOQ->GPD1 ADIPOQ->HMGCS2
## [13] ADIPOQ->LEP ADIPOQ->LHCGR ADIPOQ->MLXIPL ADIPOQ->MMP9
## [17] ADIPOQ->PCK1 ADIPOQ->PFKFB1 ADIPOQ->PPARG ADIPOQ->SCD
## [21] ADIPOQ->UCP1 ADIPOQ->UCP2 AGT ->ADIPOQ AGT ->AGT
## [25] AGT ->CASP3 AGT ->CTGF AGT ->IGF1 AGT ->LEP
## + ... omitted several edges
net<-simplify(net, remove.multiple = F, remove.loops = T)
V(net)$color<-nodes_filtered$colrs #change the node colors based on the fold-change (red = up)
V(net)$size<-abs(V(net)$log2FoldChange)*5
#Create a layout format
l <- layout_with_kk(net)
l <- norm_coords(l, ymin=-1, ymax=1, xmin=-1, xmax=1)
#Highlight the ADIPOQ Targets
inc.edges <- incident(net, V(net)[[2]], mode="all")
ecol <- rep("gray80", ecount(net))
ecol[inc.edges] <- "black"
#plot the network
plot(net, edge.arrow.size=0.4, label.cex=2, vertex.label.color="black", vertex.label.cex=.7, vertex.frame.color=NA, layout = l, rescale=F, edge.color=ecol)
legend(x=-1.5, y=-1.1, c("Upregulated", "Downregulated"), pch=21, col=c("goldenrod1", "dodgerblue"), pt.bg = c("goldenrod1", "dodgerblue"), pt.cex=2, cex = 0.8, bty = "n", ncol = 1)
##Create a pdf file of the network
pdf(file="../2_Output/1_RNA/ADIPOQ_Gene.Network.pdf")
plot(net, edge.arrow.size=0.4, label.cex=2, vertex.label.color="black", vertex.label.cex=.7, edge.color="gray50", vertex.frame.color=NA, layout = l, rescale=F, , edge.color = ecol)
legend(x=-1.5, y=-1.1, c("Upregulated", "Downregulated"), pch=21, col=c("goldenrod1", "dodgerblue"), pt.bg = c("goldenrod1", "dodgerblue"), pt.cex=2, cex = 0.8, bty = "n", ncol = 1)
dev.off()
## quartz_off_screen
## 2
tkid <- tkplot(net) #tkid is the id of the tkplot that will open
l <- tkplot.getcoords(tkid) # grab the coordinates from tkplot
plot(net, layout=l)